{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Sample code for Comparing NILM algorithms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function, division\n",
    "import time\n",
    "from matplotlib import rcParams\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import warnings\n",
    "from six import iteritems\n",
    "\n",
    "warnings.filterwarnings('ignore')\n",
    "%matplotlib inline\n",
    "\n",
    "rcParams['figure.figsize'] = (13, 6)\n",
    "\n",
    "from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore\n",
    "from nilmtk.disaggregate import CombinatorialOptimisation, FHMM"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Dividing data into train and test set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = DataSet('/data/REDD/redd.h5')\n",
    "test = DataSet('/data/REDD/redd.h5')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let us use building 1 for demo purposes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "building = 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's split data at April 30th"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "train.set_window(end=\"30-4-2011\")\n",
    "test.set_window(start=\"30-4-2011\")\n",
    "\n",
    "\n",
    "train_elec = train.buildings[1].elec\n",
    "test_elec = test.buildings[1].elec"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Selecting top-5 appliances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15/16 MeterGroup(meters==19, building=1, dataset='REDD', appliances=[Appliance(type='unknown', instance=2)])e=1)])ce=1)])\n",
      "  ElecMeter(instance=3, building=1, dataset='REDD', appliances=[Appliance(type='electric oven', instance=1)])\n",
      "  ElecMeter(instance=4, building=1, dataset='REDD', appliances=[Appliance(type='electric oven', instance=1)])\n",
      "16/16 MeterGroup(meters= for ElecMeterID(instance=3, building=1, dataset='REDD') ...    total_energy for ElecMeterID(instance=4, building=1, dataset='REDD') ...   \n",
      "  ElecMeter(instance=10, building=1, dataset='REDD', appliances=[Appliance(type='washer dryer', instance=1)])\n",
      "  ElecMeter(instance=20, building=1, dataset='REDD', appliances=[Appliance(type='washer dryer', instance=1)])\n",
      "Calculating total_energy for ElecMeterID(instance=20, building=1, dataset='REDD') ...   "
     ]
    }
   ],
   "source": [
    "top_5_train_elec = train_elec.submeters().select_top_k(k=5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Training and disaggregation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict(clf, test_elec, sample_period, timezone):\n",
    "    pred = {}\n",
    "    gt= {}\n",
    "\n",
    "    for i, chunk in enumerate(test_elec.mains().load(sample_period=sample_period)):\n",
    "        chunk_drop_na = chunk.dropna()\n",
    "        pred[i] = clf.disaggregate_chunk(chunk_drop_na)\n",
    "        gt[i]={}\n",
    "\n",
    "        for meter in test_elec.submeters().meters:\n",
    "            # Only use the meters that we trained on (this saves time!)    \n",
    "            gt[i][meter] = next(meter.load(sample_period=sample_period))\n",
    "        gt[i] = pd.DataFrame({k:v.squeeze() for k,v in iteritems(gt[i])}, index=next(iter(gt[i].values())).index).dropna()\n",
    "        \n",
    "    # If everything can fit in memory\n",
    "    gt_overall = pd.concat(gt)\n",
    "    gt_overall.index = gt_overall.index.droplevel()\n",
    "    pred_overall = pd.concat(pred)\n",
    "    pred_overall.index = pred_overall.index.droplevel()\n",
    "\n",
    "    # Having the same order of columns\n",
    "    gt_overall = gt_overall[pred_overall.columns]\n",
    "    \n",
    "    #Intersection of index\n",
    "    gt_index_utc = gt_overall.index.tz_convert(\"UTC\")\n",
    "    pred_index_utc = pred_overall.index.tz_convert(\"UTC\")\n",
    "    common_index_utc = gt_index_utc.intersection(pred_index_utc)\n",
    "    \n",
    "    \n",
    "    common_index_local = common_index_utc.tz_convert(timezone)\n",
    "    gt_overall = gt_overall.ix[common_index_local]\n",
    "    pred_overall = pred_overall.ix[common_index_local]\n",
    "    appliance_labels = [m.label() for m in gt_overall.columns.values]\n",
    "    gt_overall.columns = appliance_labels\n",
    "    pred_overall.columns = appliance_labels\n",
    "    return gt_overall, pred_overall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "********************\n",
      "CO\n",
      "********************\n",
      "Training model for submeter 'ElecMeter(instance=11, building=1, dataset='REDD', appliances=[Appliance(type='microwave', instance=1)])'\n",
      "Training model for submeter 'ElecMeter(instance=9, building=1, dataset='REDD', appliances=[Appliance(type='light', instance=1)])'\n",
      "Training model for submeter 'ElecMeter(instance=6, building=1, dataset='REDD', appliances=[Appliance(type='dish washer', instance=1)])'\n",
      "Training model for submeter 'ElecMeter(instance=5, building=1, dataset='REDD', appliances=[Appliance(type='fridge', instance=1)])'\n",
      "Training model for submeter 'ElecMeter(instance=8, building=1, dataset='REDD', appliances=[Appliance(type='sockets', instance=2)])'\n",
      "Done training!\n",
      "Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     \n",
      "Done loading data all meters for this chunk.\n",
      "Estimating power demand for 'ElecMeter(instance=11, building=1, dataset='REDD', appliances=[Appliance(type='microwave', instance=1)])'\n",
      "Estimating power demand for 'ElecMeter(instance=9, building=1, dataset='REDD', appliances=[Appliance(type='light', instance=1)])'\n",
      "Estimating power demand for 'ElecMeter(instance=6, building=1, dataset='REDD', appliances=[Appliance(type='dish washer', instance=1)])'\n",
      "Estimating power demand for 'ElecMeter(instance=5, building=1, dataset='REDD', appliances=[Appliance(type='fridge', instance=1)])'\n",
      "Estimating power demand for 'ElecMeter(instance=8, building=1, dataset='REDD', appliances=[Appliance(type='sockets', instance=2)])'\n",
      "Loading data for meter ElecMeterID(instance=4, building=1, dataset='REDD')     \n",
      "Done loading data all meters for this chunk.\n",
      "Loading data for meter ElecMeterID(instance=20, building=1, dataset='REDD')     \n",
      "Done loading data all meters for this chunk.\n",
      "********************\n",
      "FHMM\n",
      "********************\n",
      "Training model for submeter 'ElecMeter(instance=11, building=1, dataset='REDD', appliances=[Appliance(type='microwave', instance=1)])'\n",
      "Training model for submeter 'ElecMeter(instance=9, building=1, dataset='REDD', appliances=[Appliance(type='light', instance=1)])'\n",
      "Training model for submeter 'ElecMeter(instance=6, building=1, dataset='REDD', appliances=[Appliance(type='dish washer', instance=1)])'\n",
      "Training model for submeter 'ElecMeter(instance=5, building=1, dataset='REDD', appliances=[Appliance(type='fridge', instance=1)])'\n",
      "Training model for submeter 'ElecMeter(instance=8, building=1, dataset='REDD', appliances=[Appliance(type='sockets', instance=2)])'\n",
      "Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     \n",
      "Done loading data all meters for this chunk.\n",
      "Loading data for meter ElecMeterID(instance=4, building=1, dataset='REDD')     \n",
      "Done loading data all meters for this chunk.\n",
      "Loading data for meter ElecMeterID(instance=20, building=1, dataset='REDD')     \n",
      "Done loading data all meters for this chunk.\n"
     ]
    }
   ],
   "source": [
    "classifiers = {'CO':CombinatorialOptimisation(), 'FHMM':FHMM()}\n",
    "predictions = {}\n",
    "sample_period = 120\n",
    "for clf_name, clf in classifiers.items():\n",
    "    print(\"*\"*20)\n",
    "    print(clf_name)\n",
    "    print(\"*\" *20)\n",
    "    clf.train(top_5_train_elec, sample_period=sample_period)\n",
    "    gt, predictions[clf_name] = predict(clf, test_elec, 120, train.metadata['timezone'])\n",
    "   \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_rmse(gt, pred):\n",
    "    from sklearn.metrics import mean_squared_error\n",
    "    rms_error = {}\n",
    "    for appliance in gt.columns:\n",
    "        rms_error[appliance] = np.sqrt(mean_squared_error(gt[appliance], pred[appliance]))\n",
    "    return pd.Series(rms_error)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "rmse = {}\n",
    "for clf_name in classifiers.keys():\n",
    "    rmse[clf_name] = compute_rmse(gt, predictions[clf_name])\n",
    "rmse = pd.DataFrame(rmse)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CO</th>\n",
       "      <th>FHMM</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Dish washer</th>\n",
       "      <td>222.076767</td>\n",
       "      <td>183.716326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Fridge</th>\n",
       "      <td>189.811752</td>\n",
       "      <td>165.582688</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Light</th>\n",
       "      <td>122.587723</td>\n",
       "      <td>92.281793</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Microwave</th>\n",
       "      <td>192.521194</td>\n",
       "      <td>177.342418</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sockets</th>\n",
       "      <td>28.782600</td>\n",
       "      <td>47.876566</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     CO        FHMM\n",
       "Dish washer  222.076767  183.716326\n",
       "Fridge       189.811752  165.582688\n",
       "Light        122.587723   92.281793\n",
       "Microwave    192.521194  177.342418\n",
       "Sockets       28.782600   47.876566"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rmse"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
